"mean(", i , "$pvalue_zero, na.rm = TRUE)")
eval(parse(text = todo))
# Set to na
todo <- paste0("summaries$beta_na[summaries$varname == '", i, "'] <- ",
"mean(", i , "$beta_na, na.rm = TRUE)")
eval(parse(text = todo))
todo <- paste0("summaries$pvalue_na[summaries$varname == '", i, "'] <- ",
"mean(", i , "$pvalue_na, na.rm = TRUE)")
eval(parse(text = todo))
# First order markov w/ incidence as LHS
todo <- paste0("summaries$beta_mark[summaries$varname == '", i, "'] <- ",
"mean(", i , "$beta_mark, na.rm = TRUE)")
eval(parse(text = todo))
todo <- paste0("summaries$pvalue_mark[summaries$varname == '", i, "'] <- ",
"mean(", i , "$pvalue_mark, na.rm = TRUE)")
eval(parse(text = todo))
todo <- paste0("summaries$beta_d_mark[summaries$varname == '", i, "'] <- ",
"mean(", i , "$beta_d_mark, na.rm = TRUE)")
eval(parse(text = todo))
todo <- paste0("summaries$pvalue_d_mark[summaries$varname == '", i, "'] <- ",
"mean(", i , "$pvalue_d_mark, na.rm = TRUE)")
eval(parse(text = todo))
####### Weighted means (by McFadden's pseudo-R2) #######
# Set to zero
todo <- paste0("summaries$beta_w_zero[summaries$varname == '", i, "'] <- ",
"weighted.mean(", i , "$beta_zero, ", i , "$fit_zero, na.rm = TRUE)")
eval(parse(text = todo))
todo <- paste0("summaries$pvalue_w_zero[summaries$varname == '", i, "'] <- ",
"weighted.mean(", i , "$pvalue_zero, ", i , "$fit_zero, na.rm = TRUE)")
eval(parse(text = todo))
todo <- paste0("summaries$se_w_zero[summaries$varname == '", i, "'] <- ",
"weighted.mean(", i , "$se_zero, ", i , "$fit_zero, na.rm = TRUE)")
eval(parse(text = todo))
# Set to NA
todo <- paste0("summaries$beta_w_na[summaries$varname == '", i, "'] <- ",
"weighted.mean(", i , "$beta_na, ", i , "$fit_na, na.rm = TRUE)")
eval(parse(text = todo))
todo <- paste0("summaries$pvalue_w_na[summaries$varname == '", i, "'] <- ",
"weighted.mean(", i , "$pvalue_na, ", i , "$fit_na, na.rm = TRUE)")
eval(parse(text = todo))
todo <- paste0("summaries$se_w_na[summaries$varname == '", i, "'] <- ",
"weighted.mean(", i , "$se_na, ", i , "$fit_na, na.rm = TRUE)")
eval(parse(text = todo))
# First order markov with incidence
todo <- paste0("summaries$beta_w_mark[summaries$varname == '", i, "'] <- ",
"weighted.mean(", i , "$beta_mark, ", i , "$fit_mark, na.rm = TRUE)")
eval(parse(text = todo))
todo <- paste0("summaries$pvalue_w_mark[summaries$varname == '", i, "'] <- ",
"weighted.mean(", i , "$pvalue_mark, ", i , "$fit_mark, na.rm = TRUE)")
eval(parse(text = todo))
todo <- paste0("summaries$se_w_mark[summaries$varname == '", i, "'] <- ",
"weighted.mean(", i , "$se_mark, ", i , "$fit_mark, na.rm = TRUE)")
eval(parse(text = todo))
todo <- paste0("summaries$beta_d_w_mark[summaries$varname == '", i, "'] <- ",
"weighted.mean(", i , "$beta_d_mark, ", i , "$fit_mark, na.rm = TRUE)")
eval(parse(text = todo))
todo <- paste0("summaries$pvalue_d_w_mark[summaries$varname == '", i, "'] <- ",
"weighted.mean(", i , "$pvalue_d_mark, ", i , "$fit_mark, na.rm = TRUE)")
eval(parse(text = todo))
}
summaries$varconc <- varconc
summary(summaries)
rm(list = ls())
################################################################################
# Code for the analysis of the Monte Carlos
# Estimating Onsets of Binary Events in Panel Data
# Liam F. McGrath
#
# Purpose: This code analyses the results of the monte carlos conducted.
#          The results of the monte carlos are generated by mc_run.R
#          and are contained in collected_props.csv in the output sub folder.
#          Also required is the file scenarios.csv which contains information
#          on each scenario considered in the monte carlos.
#          This file generates figures 1-5 and table 3 in the main text, plus
#          figures 7-9 in the appendix.
#
rm(list = ls())
library(ggthemes)
library(splines)
library(xtable)
# IMPORTANT:
# Define the path which all the code, data and results are located within
# From this changes in working directory are formed by pasting this
# string with a string for the relevant subfolder.
# If using this code be sure to change to this to your own directory
dir <- "/Users/liammcgrath/Documents/Academic/Research/transition/replication_materials/"
setwd(paste0(dir, "code/"))
# Load in function for combine plots
source("multiplot.R")
## Load in this for analysis
setwd(paste0(dir, "output/"))
setwd(paste0(dir, "output/"))
newdata <- read.csv("collected_props.csv")
scenarios <- read.csv("scenarios.csv")
# Add some scenario info to the MC results
newdata$d <- scenarios$d
newdata$a <- scenarios$a
rm(list = ls())
library(ggthemes)
library(splines)
library(xtable)
# IMPORTANT:
# Define the path which all the code, data and results are located within
# From this changes in working directory are formed by pasting this
# string with a string for the relevant subfolder.
# If using this code be sure to change to this to your own directory
dir <- "/Users/liammcgrath/Documents/Academic/Research/transition/replication_materials/"
setwd(paste0(dir, "code/"))
# Load in function for combine plots
source("multiplot.R")
## Load in results of monte carlos and info on scenarios
setwd(paste0(dir, "output/"))
newdata <- read.csv("collected_props.csv")
scenarios <- read.csv("scenarios.csv")
# Add some scenario info to the MC results
newdata$d <- scenarios$d
newdata$a <- scenarios$a
# Information on number of observations' outcomes coded to 0
newdata$prop_y_recode <- (newdata$mean_y - newdata$mean_y_trans) / newdata$mean_y
newdata$prop_n_recode <- newdata$mean_y - newdata$mean_y_trans
# Create bias results of estimate for each model
# First as we are focusing on bias relative to beta
# only look at MCs where beta does not equal zero (can't divide by zero)
newdata <- newdata[newdata$b != 0,]
newdata$bias_model1 <- (newdata$mean_b_model1 - newdata$b) / (newdata$b)
newdata$bias_model2 <- (newdata$mean_b_model2 - newdata$b) / (newdata$b)
newdata$bias_model3 <- (newdata$mean_b_model3 - newdata$b) / (newdata$b)
newdata$bias_model4 <- (newdata$mean_b_model4 - newdata$b) / (newdata$b)
# Define the categories of y used in figure 5
newdata$y_category <- NA
newdata$y_category[newdata$mean_y <=0.05] <- "0.00 < y <= 0.05"
newdata$y_category[newdata$mean_y > 0.05 & newdata$mean_y <=0.1] <- "0.05 < y <= 0.10"
newdata$y_category[newdata$mean_y > 0.10 & newdata$mean_y <=0.15] <- "0.10 < y < 0.15"
newdata$y_category[newdata$mean_y > 0.15 & newdata$mean_y <=0.2] <- "0.15 < y <= 0.20"
newdata$y_category[newdata$mean_y > 0.2 & newdata$mean_y <= 0.25] <- "0.20 < y <= 0.25 "
newdata_r <- newdata[newdata$mean_y <= 0.25,] # "data restricted"
# First change data format, so that can use facets argument in ggplot
long <- data.frame(model = c(rep("Model 1", nrow(newdata_r)),
rep("Model 2", nrow(newdata_r)),
rep("Model 3", nrow(newdata_r)),
rep("Model 4", nrow(newdata_r))
)
)
long$bias <- c(newdata_r$bias_model1, newdata_r$bias_model2, newdata_r$bias_model3,
newdata_r$bias_model4)
long$ci_prop <- c(newdata_r$ci_prop_model_1, newdata_r$ci_prop_model_2,
newdata_r$ci_prop_model_3, newdata_r$ci_prop_model4)
long$prop_y_recode <- rep(newdata_r$prop_y_recode, 4)
long$prop_n_recode <- rep(newdata_r$prop_n_recode, 4)
long$rmse <- c(newdata_r$rmse_b_model1, newdata_r$rmse_b_model2, newdata_r$rmse_b_model3,
newdata_r$rmse_b_model4) /
rep(abs(newdata_r$b), 4) # relative to beta coefficient
long$b <- rep(newdata_r$b, 4)
long$g <- rep(newdata_r$g, 4)
long$a <- rep(newdata_r$a, 4)
long$d <- rep(newdata_r$d, 4)
bias_output <- data.frame(indic = c("Mean of Bias (%)", "Mean of 95% CI Non-Coverage",
"Mean of $y$", "Mean of Recoded $y$")
)
bias_output$model1 <- NA
bias_output$model2 <- NA
bias_output$model3 <- NA
bias_output$model4 <- NA
bias_output[1, 2:5] <- tapply(long$bias * 100, long$model, mean)
bias_output[2, 2:5] <- tapply(100 * (long$ci_prop - 0.95) / 0.95, long$model, mean)
bias_output[3, 2:5] <- rep(mean(newdata_r$mean_y) , 4)
bias_output[4, 2:5] <- rep(mean(newdata_r$mean_y_trans) , 4)
bias_output
fig1_b <- qplot(data = long, y = bias * 100, x = mean_y,
facets = . ~ model, alpha = I(0.2)) + geom_smooth() +
geom_hline(yintercept = 0, linetype = "dashed") +
theme_bw() + xlab("Proportion of y = 1 in the Sample") +
ylab("Bias (%)")
# CI Prop
fig1_ci <- qplot(data = long, y = 100 * (ci_prop - 0.95) / 0.95, x = mean_y,
facets = . ~ model, alpha = I(0.2)) + geom_smooth() +
geom_hline(yintercept = 0, linetype = "dashed") +
theme_bw() + xlab("Proportion of y = 1 in the Sample") +
ylab("95% Confidence Interval Non-Coverage")
multiplot(fig1_b, fig1_ci)
rm(list = ls())
library(ggthemes)
library(splines)
library(xtable)
# IMPORTANT:
# Define the path which all the code, data and results are located within
# From this changes in working directory are formed by pasting this
# string with a string for the relevant subfolder.
# If using this code be sure to change to this to your own directory
dir <- "/Users/liammcgrath/Documents/Academic/Research/transition/replication_materials/"
setwd(paste0(dir, "code/"))
# Load in function for combine plots
source("multiplot.R")
## Load in results of monte carlos and info on scenarios
setwd(paste0(dir, "output/"))
newdata <- read.csv("collected_props.csv")
scenarios <- read.csv("scenarios.csv")
# Add some scenario info to the MC results
newdata$d <- scenarios$d
newdata$a <- scenarios$a
# Information on number of observations' outcomes coded to 0
newdata$prop_y_recode <- (newdata$mean_y - newdata$mean_y_trans) / newdata$mean_y
newdata$prop_n_recode <- newdata$mean_y - newdata$mean_y_trans
# Create bias results of estimate for each model
# First as we are focusing on bias relative to beta
# only look at MCs where beta does not equal zero (can't divide by zero)
newdata <- newdata[newdata$b != 0,]
newdata$bias_model1 <- (newdata$mean_b_model1 - newdata$b) / (newdata$b)
newdata$bias_model2 <- (newdata$mean_b_model2 - newdata$b) / (newdata$b)
newdata$bias_model3 <- (newdata$mean_b_model3 - newdata$b) / (newdata$b)
newdata$bias_model4 <- (newdata$mean_b_model4 - newdata$b) / (newdata$b)
# Define the categories of y used in figure 5
newdata$y_category <- NA
newdata$y_category[newdata$mean_y <=0.05] <- "0.00 < y <= 0.05"
newdata$y_category[newdata$mean_y > 0.05 & newdata$mean_y <=0.1] <- "0.05 < y <= 0.10"
newdata$y_category[newdata$mean_y > 0.10 & newdata$mean_y <=0.15] <- "0.10 < y < 0.15"
newdata$y_category[newdata$mean_y > 0.15 & newdata$mean_y <=0.2] <- "0.15 < y <= 0.20"
newdata$y_category[newdata$mean_y > 0.2 & newdata$mean_y <= 0.25] <- "0.20 < y <= 0.25 "
#######
# ANALYSIS OF MONTE CARLO RESULTS
#######
## For the results in the paper only look at scenarios where
## the proportion of y = 1 in the sample is less than or equal to 0.25
## Results for the full sample occur later on.
newdata_r <- newdata[newdata$mean_y <= 0.25,] # "data restricted"
# First change data format, so that can use facets argument in ggplot
long <- data.frame(model = c(rep("Model 1", nrow(newdata_r)),
rep("Model 2", nrow(newdata_r)),
rep("Model 3", nrow(newdata_r)),
rep("Model 4", nrow(newdata_r))
)
)
long$bias <- c(newdata_r$bias_model1, newdata_r$bias_model2, newdata_r$bias_model3,
newdata_r$bias_model4)
long$ci_prop <- c(newdata_r$ci_prop_model_1, newdata_r$ci_prop_model_2,
newdata_r$ci_prop_model_3, newdata_r$ci_prop_model4)
long$prop_y_recode <- rep(newdata_r$prop_y_recode, 4)
long$prop_n_recode <- rep(newdata_r$prop_n_recode, 4)
long$rmse <- c(newdata_r$rmse_b_model1, newdata_r$rmse_b_model2, newdata_r$rmse_b_model3,
newdata_r$rmse_b_model4) /
rep(abs(newdata_r$b), 4) # relative to beta coefficient
long$b <- rep(newdata_r$b, 4)
long$g <- rep(newdata_r$g, 4)
long$a <- rep(newdata_r$a, 4)
long$d <- rep(newdata_r$d, 4)
long$mean_y <- rep(newdata_r$mean_y, 4)
long$mean_y_trans <-rep(newdata_r$mean_y_trans, 4)
bias_output <- data.frame(indic = c("Mean of Bias (%)", "Mean of 95% CI Non-Coverage",
"Mean of $y$", "Mean of Recoded $y$")
)
bias_output$model1 <- NA
bias_output$model2 <- NA
bias_output$model3 <- NA
bias_output$model4 <- NA
bias_output[1, 2:5] <- tapply(long$bias * 100, long$model, mean)
bias_output[2, 2:5] <- tapply(100 * (long$ci_prop - 0.95) / 0.95, long$model, mean)
bias_output[3, 2:5] <- rep(mean(newdata_r$mean_y) , 4)
bias_output[4, 2:5] <- rep(mean(newdata_r$mean_y_trans) , 4)
fig1_b <- qplot(data = long, y = bias * 100, x = mean_y,
facets = . ~ model, alpha = I(0.2)) + geom_smooth() +
geom_hline(yintercept = 0, linetype = "dashed") +
theme_bw() + xlab("Proportion of y = 1 in the Sample") +
ylab("Bias (%)")
# CI Prop
fig1_ci <- qplot(data = long, y = 100 * (ci_prop - 0.95) / 0.95, x = mean_y,
facets = . ~ model, alpha = I(0.2)) + geom_smooth() +
geom_hline(yintercept = 0, linetype = "dashed") +
theme_bw() + xlab("Proportion of y = 1 in the Sample") +
ylab("95% Confidence Interval Non-Coverage")
multiplot(fig1_b, fig1_ci)
fig2_b <- qplot(data = long, y = bias * 100, x = prop_n_recode,
facets = . ~ model, alpha = I(0.2)) + geom_smooth() +
geom_hline(yintercept = 0, linetype = "dashed") +
theme_bw() + xlab("Proportion of Observations Recoded") +
ylab("Bias (%)")
# CI Prop
fig2_ci <- qplot(data = long, y = 100 * (ci_prop - 0.95) / 0.95, x = prop_n_recode,
facets = . ~ model, alpha = I(0.2)) + geom_smooth() +
geom_hline(yintercept = 0, linetype = "dashed") +
theme_bw() + xlab("Proportion of Observations Recoded") +
ylab("95% Confidence Interval Non-Coverage")
multiplot(fig2_b, fig2_ci)
fig3_b <- qplot(data = long, y = bias * 100, x = prop_n_recode,
facets = . ~ model, alpha = I(0.2),
group = abs(b), colour = abs(b)) + geom_smooth() +
geom_hline(yintercept = 0, linetype = "dashed") +
theme_bw() + xlab("Proportion of Observations Recoded") +
ylab("Bias (%)")
# CI Prop
fig3_ci <- qplot(data = long, y = 100 * (ci_prop - 0.95) / 0.95, x = prop_n_recode,
facets = . ~ model, alpha = I(0.2),
group = abs(b), colour = abs(b)) + geom_smooth() +
geom_hline(yintercept = 0, linetype = "dashed") +
theme_bw() + xlab("Proportion of Observations Recoded") +
ylab("95% Confidence Interval Non-Coverage")
multiplot(fig3_b, fig3_ci)
fig4_b <- qplot(data = long, y = bias * 100, x = prop_y_recode,
facets = . ~ model, alpha = I(0.2),
group = abs(b), colour = abs(b)) +
geom_smooth() +
geom_hline(yintercept = 0, linetype = "dashed") +
theme_bw() + xlab("Proportion of Dependent Variable Recoded") +
ylab("Bias (%)")
# CI Prop
fig4_ci <- qplot(data = long, y = 100 * (ci_prop - 0.95) / 0.95, x = prop_y_recode,
facets = . ~ model, alpha = I(0.2),
group = abs(b), colour = abs(b)) +
geom_smooth() +
geom_hline(yintercept = 0, linetype = "dashed") +
theme_bw() + xlab("Proportion of Dependent Variable Recoded") +
ylab("95% Confidence Interval Non-Coverage")
multiplot(fig4_b, fig4_ci)
fig5_b <- qplot(data = long, y = bias * 100, x = prop_y_recode,
facets = y_category ~ model, alpha = I(0.2),
group = abs(b), colour = abs(b)) +
geom_smooth() +
geom_hline(yintercept = 0, linetype = "dashed") +
theme_bw() + xlab("Proportion of Dependent Variable Recoded") +
ylab("Bias (%)")
# CI Prop
fig5_ci <- qplot(data = long, y = 100 * (ci_prop - 0.95) / 0.95, x = prop_y_recode,
facets = y_category ~ model, alpha = I(0.2),
group = abs(b), colour = abs(b)) +
geom_smooth() +
geom_hline(yintercept = 0, linetype = "dashed") +
theme_bw() + xlab("Proportion of Dependent Variable Recoded") +
ylab("95% Confidence Interval Non-Coverage")
multiplot(fig5_b, fig5_ci)
qplot(data = long, y = bias * 100, x = prop_y_recode,
facets = y_category ~ model, alpha = I(0.2),
group = abs(b), colour = abs(b)) +
geom_smooth() +
geom_hline(yintercept = 0, linetype = "dashed") +
theme_bw() + xlab("Proportion of Dependent Variable Recoded") +
ylab("Bias (%)")
rm(list = ls())
library(ggthemes)
library(splines)
library(xtable)
# IMPORTANT:
# Define the path which all the code, data and results are located within
# From this changes in working directory are formed by pasting this
# string with a string for the relevant subfolder.
# If using this code be sure to change to this to your own directory
dir <- "/Users/liammcgrath/Documents/Academic/Research/transition/replication_materials/"
setwd(paste0(dir, "code/"))
# Load in function for combine plots
source("multiplot.R")
## Load in results of monte carlos and info on scenarios
setwd(paste0(dir, "output/"))
newdata <- read.csv("collected_props.csv")
scenarios <- read.csv("scenarios.csv")
# Add some scenario info to the MC results
newdata$d <- scenarios$d
newdata$a <- scenarios$a
# Information on number of observations' outcomes coded to 0
newdata$prop_y_recode <- (newdata$mean_y - newdata$mean_y_trans) / newdata$mean_y
newdata$prop_n_recode <- newdata$mean_y - newdata$mean_y_trans
# Create bias results of estimate for each model
# First as we are focusing on bias relative to beta
# only look at MCs where beta does not equal zero (can't divide by zero)
newdata <- newdata[newdata$b != 0,]
newdata$bias_model1 <- (newdata$mean_b_model1 - newdata$b) / (newdata$b)
newdata$bias_model2 <- (newdata$mean_b_model2 - newdata$b) / (newdata$b)
newdata$bias_model3 <- (newdata$mean_b_model3 - newdata$b) / (newdata$b)
newdata$bias_model4 <- (newdata$mean_b_model4 - newdata$b) / (newdata$b)
# Define the categories of y used in figure 5
newdata$y_category <- NA
newdata$y_category[newdata$mean_y <=0.05] <- "0.00 < y <= 0.05"
newdata$y_category[newdata$mean_y > 0.05 & newdata$mean_y <=0.1] <- "0.05 < y <= 0.10"
newdata$y_category[newdata$mean_y > 0.10 & newdata$mean_y <=0.15] <- "0.10 < y < 0.15"
newdata$y_category[newdata$mean_y > 0.15 & newdata$mean_y <=0.2] <- "0.15 < y <= 0.20"
newdata$y_category[newdata$mean_y > 0.2 & newdata$mean_y <= 0.25] <- "0.20 < y <= 0.25 "
#######
# ANALYSIS OF MONTE CARLO RESULTS
#######
## For the results in the paper only look at scenarios where
## the proportion of y = 1 in the sample is less than or equal to 0.25
## Results for the full sample occur later on.
newdata_r <- newdata[newdata$mean_y <= 0.25,] # "data restricted"
# First change data format, so that can use facets argument in ggplot
long <- data.frame(model = c(rep("Model 1", nrow(newdata_r)),
rep("Model 2", nrow(newdata_r)),
rep("Model 3", nrow(newdata_r)),
rep("Model 4", nrow(newdata_r))
)
)
long$bias <- c(newdata_r$bias_model1, newdata_r$bias_model2, newdata_r$bias_model3,
newdata_r$bias_model4)
long$ci_prop <- c(newdata_r$ci_prop_model_1, newdata_r$ci_prop_model_2,
newdata_r$ci_prop_model_3, newdata_r$ci_prop_model4)
long$prop_y_recode <- rep(newdata_r$prop_y_recode, 4)
long$prop_n_recode <- rep(newdata_r$prop_n_recode, 4)
long$rmse <- c(newdata_r$rmse_b_model1, newdata_r$rmse_b_model2, newdata_r$rmse_b_model3,
newdata_r$rmse_b_model4) /
rep(abs(newdata_r$b), 4) # relative to beta coefficient
long$b <- rep(newdata_r$b, 4)
long$g <- rep(newdata_r$g, 4)
long$a <- rep(newdata_r$a, 4)
long$d <- rep(newdata_r$d, 4)
long$mean_y <- rep(newdata_r$mean_y, 4)
long$mean_y_trans <-rep(newdata_r$mean_y_trans, 4)
long$y_category <- rep(newdata_r$y_category, 4)
# save(long, file = "long_results.RData")
####
# Table 3: Summarising the overal biases
#####
bias_output <- data.frame(indic = c("Mean of Bias (%)", "Mean of 95% CI Non-Coverage",
"Mean of $y$", "Mean of Recoded $y$")
)
bias_output$model1 <- NA
bias_output$model2 <- NA
bias_output$model3 <- NA
bias_output$model4 <- NA
bias_output[1, 2:5] <- tapply(long$bias * 100, long$model, mean)
bias_output[2, 2:5] <- tapply(100 * (long$ci_prop - 0.95) / 0.95, long$model, mean)
bias_output[3, 2:5] <- rep(mean(newdata_r$mean_y) , 4)
bias_output[4, 2:5] <- rep(mean(newdata_r$mean_y_trans) , 4)
fig5_b <- qplot(data = long, y = bias * 100, x = prop_y_recode,
facets = y_category ~ model, alpha = I(0.2),
group = abs(b), colour = abs(b)) +
geom_smooth() +
geom_hline(yintercept = 0, linetype = "dashed") +
theme_bw() + xlab("Proportion of Dependent Variable Recoded") +
ylab("Bias (%)")
# CI Prop
fig5_ci <- qplot(data = long, y = 100 * (ci_prop - 0.95) / 0.95, x = prop_y_recode,
facets = y_category ~ model, alpha = I(0.2),
group = abs(b), colour = abs(b)) +
geom_smooth() +
geom_hline(yintercept = 0, linetype = "dashed") +
theme_bw() + xlab("Proportion of Dependent Variable Recoded") +
ylab("95% Confidence Interval Non-Coverage")
multiplot(fig5_b, fig5_ci)
warnings()
long_full <- data.frame(model = c(rep("Model 1", nrow(newdata)),
rep("Model 2", nrow(newdata)),
rep("Model 3", nrow(newdata)),
rep("Model 4", nrow(newdata))
)
)
long_full$bias <- c(newdata$bias_model1, newdata$bias_model2, newdata$bias_model3,
newdata$bias_model4)
long_full$ci_prop <- c(newdata$ci_prop_model_1, newdata$ci_prop_model_2,
newdata$ci_prop_model_3, newdata$ci_prop_model4)
long_full$prop_y_recode <- rep(newdata$prop_y_recode, 4)
long_full$prop_n_recode <- rep(newdata$prop_n_recode, 4)
long_full$rmse <- c(newdata$rmse_b_model1, newdata$rmse_b_model2, newdata$rmse_b_model3,
newdata$rmse_b_model4) /
rep(abs(newdata$b), 4) # relative to beta coefficient
long_full$b <- rep(newdata$b, 4)
long_full$g <- rep(newdata$g, 4)
long_full$a <- rep(newdata$a, 4)
long_full$d <- rep(newdata$d, 4)
long_full$mean_y <- rep(newdata$mean_y, 4)
long_full$mean_y_trans <-rep(newdata$mean_y_trans, 4)
long_full$y_category <- rep(newdata$y_category, 4)
### Graphs for appendix
figa1_b <- qplot(data = long_full, y = bias * 100, x = mean_y,
facets = . ~ model, alpha = I(0.2)) + geom_smooth() +
geom_hline(yintercept = 0, linetype = "dashed") +
theme_bw() + xlab("Proportion of y = 1 in the Sample") +
ylab("Bias (%)")
# CI Prop
figa1_ci <- qplot(data = long_full, y = 100 * (ci_prop - 0.95) / 0.95, x = mean_y,
facets = . ~ model, alpha = I(0.2)) + geom_smooth() +
geom_hline(yintercept = 0, linetype = "dashed") +
theme_bw() + xlab("Proportion of y = 1 in the Sample") +
ylab("95% Confidence Interval Non-Coverage")
# RMSE
figa1_r <- qplot(data = long_full, y = rmse, x = mean_y,
facets = . ~ model, alpha = I(0.2)) + geom_smooth() +
geom_hline(yintercept = 0, linetype = "dashed") +
theme_bw() + xlab("Proportion of y = 1 in the Sample") +
ylab("Root Mean Squared Error")
multiplot(figa1_b, figa1_ci, figa1_r)
figa2_b <- qplot(data = long_full, y = bias * 100, x = prop_n_recode,
facets = . ~ model, alpha = I(0.2)) + geom_smooth() +
geom_hline(yintercept = 0, linetype = "dashed") +
theme_bw() + xlab("Proportion of Observations Recoded") +
ylab("Bias (%)")
# CI Prop
figa2_ci <- qplot(data = long_full, y = 100 * (ci_prop - 0.95) / 0.95, x = prop_n_recode,
facets = . ~ model, alpha = I(0.2)) + geom_smooth() +
geom_hline(yintercept = 0, linetype = "dashed") +
theme_bw() + xlab("Proportion of Observations Recoded") +
ylab("95% Confidence Interval Non-Coverage")
# RMSE
figa2_r <- qplot(data = long_full, y = rmse, x = prop_n_recode,
facets = . ~ model, alpha = I(0.2)) + geom_smooth() +
geom_hline(yintercept = 0, linetype = "dashed") +
theme_bw() + xlab("Proportion of Observations Recoded") +
ylab("Root Mean Squared Error")
multiplot(figa2_b, figa2_ci, figa2_r)
figa3_b <- qplot(data = long_full, y = bias * 100, x = prop_y_recode,
facets = . ~ model, alpha = I(0.2)) + geom_smooth() +
geom_hline(yintercept = 0, linetype = "dashed") +
theme_bw() + xlab("Proportion of Dependent Variable Recoded") +
ylab("Bias (%)")
# CI Prop
figa3_ci <- qplot(data = long_full, y = 100 * (ci_prop - 0.95) / 0.95, x = prop_y_recode,
facets = . ~ model, alpha = I(0.2)) + geom_smooth() +
geom_hline(yintercept = 0, linetype = "dashed") +
theme_bw() + xlab("Proportion of Dependent Variable Recoded") +
ylab("95% Confidence Interval Non-Coverage")
# RMSE
figa3_r <- qplot(data = long_full, y = rmse, x = prop_y_recode,
facets = . ~ model, alpha = I(0.2)) + geom_smooth() +
geom_hline(yintercept = 0, linetype = "dashed") +
theme_bw() + xlab("Proportion of Dependent Variable Recoded") +
ylab("Root Mean Squared Error")
multiplot(figa3_b, figa3_ci, figa3_r)
